In [7]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import to_categorical

from keras.callbacks import ModelCheckpoint

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
In [8]:
import zipfile
with zipfile.ZipFile('archive baby.zip', 'r') as zip_ref:
    zip_ref.extractall('./kaggle/input/surrey-audiovisual-expressed-emotion-savee')
In [9]:
import zipfile
with zipfile.ZipFile('archive corner.zip', 'r') as zip_ref:
    zip_ref.extractall('./kaggle/input/toronto-emotional-speech-set-te')
    
In [10]:
import zipfile
with zipfile.ZipFile('archive flow.zip', 'r') as zip_ref:
    zip_ref.extractall('./kaggle/input/ravdess-emotional-speech-audio')
In [11]:
import zipfile
with zipfile.ZipFile('archive stay.zip', 'r') as zip_ref:
    zip_ref.extractall('./kaggle/input/cremad')
In [12]:
Ravdess = "./kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"

Crema = "./kaggle/input/cremad/AudioWAV/"
Tess = "./kaggle/input/toronto-emotional-speech-set-tes/TESS Toronto emotional speech set data/TESS Toronto emotional speech set data/"
Savee = "./kaggle/input/surrey-audiovisual-expressed-emotion-savee/ALL/"
In [13]:
ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []
for dir in ravdess_directory_list:
    # as their are 20 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(Ravdess + dir)
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

# changing integers to actual emotions.
Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df.head()
Out[13]:
Emotions Path
0 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
1 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
2 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
3 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
4 calm ./kaggle/input/ravdess-emotional-speech-audio/...
In [14]:
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()
Out[14]:
Emotions Path
0 angry ./kaggle/input/cremad/AudioWAV/1001_DFA_ANG_XX...
1 disgust ./kaggle/input/cremad/AudioWAV/1001_DFA_DIS_XX...
2 fear ./kaggle/input/cremad/AudioWAV/1001_DFA_FEA_XX...
3 happy ./kaggle/input/cremad/AudioWAV/1001_DFA_HAP_XX...
4 neutral ./kaggle/input/cremad/AudioWAV/1001_DFA_NEU_XX...
In [15]:
tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(Tess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df.head()
Out[15]:
Emotions Path
0 angry ./kaggle/input/toronto-emotional-speech-set-te...
1 angry ./kaggle/input/toronto-emotional-speech-set-te...
2 angry ./kaggle/input/toronto-emotional-speech-set-te...
3 angry ./kaggle/input/toronto-emotional-speech-set-te...
4 angry ./kaggle/input/toronto-emotional-speech-set-te...
In [16]:
savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()
Out[16]:
Emotions Path
0 angry ./kaggle/input/surrey-audiovisual-expressed-em...
1 angry ./kaggle/input/surrey-audiovisual-expressed-em...
2 angry ./kaggle/input/surrey-audiovisual-expressed-em...
3 angry ./kaggle/input/surrey-audiovisual-expressed-em...
4 angry ./kaggle/input/surrey-audiovisual-expressed-em...
In [17]:
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()
Out[17]:
Emotions Path
0 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
1 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
2 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
3 neutral ./kaggle/input/ravdess-emotional-speech-audio/...
4 calm ./kaggle/input/ravdess-emotional-speech-audio/...
In [18]:
plt.title('Count of Emotions', size=16)
sns.countplot(data_path.Emotions)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()
No description has been provided for this image
In [19]:
def create_waveplot(data, sr, e):
    plt.figure(figsize=(10, 3))
    plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
    librosa.display.waveplot(data, sr=sr)
    plt.show()

def create_spectrogram(data, sr, e):
    # stft function converts the data into short term fourier transform
    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(12, 3))
    plt.title('Spectrogram for audio with {} emotion'.format(e), size=15)
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')   
    #librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()
In [20]:
# emotion='fear'
# path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
# data, sampling_rate = librosa.load(path)
# create_waveplot(data, sampling_rate, emotion)
# create_spectrogram(data, sampling_rate, emotion)
# Audio(path)
import librosa
import librosa.display
import matplotlib.pyplot as plt

def create_waveplot(data, sr, e):
    plt.figure(figsize=(10, 3))
    plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
    plt.plot(data)
    plt.show()

emotion='fear'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
No description has been provided for this image
Out[20]:
Your browser does not support the audio element.
No description has been provided for this image
In [21]:
emotion='angry'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
No description has been provided for this image
Out[21]:
Your browser does not support the audio element.
No description has been provided for this image
In [22]:
emotion='sad'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
No description has been provided for this image
Out[22]:
Your browser does not support the audio element.
No description has been provided for this image
In [23]:
emotion='happy'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
No description has been provided for this image
Out[23]:
Your browser does not support the audio element.
No description has been provided for this image
In [24]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

# taking any example and checking for techniques.
path = np.array(data_path.Path)[1]
data, sample_rate = librosa.load(path)
In [25]:
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=data, sr=sample_rate)
# Audio(path)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio

plt.figure(figsize=(14, 4))
plt.plot(data)  # Assuming data is your audio signal
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Audio waveform')
plt.show()

Audio(path)  # Assuming path is the path to your audio file
No description has been provided for this image
Out[25]:
Your browser does not support the audio element.
In [26]:
# x = noise(data)
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=x, sr=sample_rate)
# Audio(x, rate=sample_rate)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
import numpy as np

def noise(data, factor=0.005):
    noise = np.random.normal(0, 1, len(data))
    return data + factor * noise

x = noise(data)
plt.figure(figsize=(14, 4))
plt.plot(x)  # Assuming x is your noisy audio signal
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Noisy Audio waveform')
plt.show()

Audio(x, rate=sample_rate)  # Assuming sample_rate is the sample rate of your audio
No description has been provided for this image
Out[26]:
Your browser does not support the audio element.
In [27]:
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=rate)  # Specify rate as a keyword argument

x = stretch(data)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Stretched Audio waveform')
plt.show()

Audio(x, rate=sample_rate)
No description has been provided for this image
Out[27]:
Your browser does not support the audio element.
In [28]:
# x = shift(data)
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=x, sr=sample_rate)
# Audio(x, rate=sample_rate)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio

def shift(data, shift_amount=500):
    return np.roll(data, shift_amount)

x = shift(data)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Shifted Audio waveform')
plt.show()

Audio(x, rate=sample_rate)
No description has been provided for this image
Out[28]:
Your browser does not support the audio element.
In [29]:
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=int(pitch_factor * 100))

x = pitch(data, sample_rate)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Pitch-shifted Audio waveform')
plt.show()

Audio(x, rate=sample_rate)
No description has been provided for this image
Out[29]:
Your browser does not support the audio element.
In [30]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching and pitching
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3)) # stacking vertically
    
    return result
In [46]:
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path[600:675]
total_rows = len(data_path)

print("Total number of rows in data_path.csv:", total_rows)
Total number of rows in data_path.csv: 12162
In [63]:
X, Y = [], []
for path, emotion in zip(data_path.Path, data_path.Emotions):
    feature = get_features(path)
    for ele in feature:
        X.append(ele)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append(emotion)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[63], line 3
      1 X, Y = [], []
      2 for path, emotion in zip(data_path.Path, data_path.Emotions):
----> 3     feature = get_features(path)
      4     for ele in feature:
      5         X.append(ele)

Cell In[30], line 41, in get_features(path)
     39 # data with stretching and pitching
     40 new_data = stretch(data)
---> 41 data_stretch_pitch = pitch(new_data, sample_rate)
     42 res3 = extract_features(data_stretch_pitch)
     43 result = np.vstack((result, res3)) # stacking vertically

Cell In[29], line 6, in pitch(data, sampling_rate, pitch_factor)
      5 def pitch(data, sampling_rate, pitch_factor=0.7):
----> 6     return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=int(pitch_factor * 100))

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\effects.py:332, in pitch_shift(y, sr, n_steps, bins_per_octave, res_type, scale, **kwargs)
    328 rate = 2.0 ** (-float(n_steps) / bins_per_octave)
    330 # Stretch in time, then resample
    331 y_shift = core.resample(
--> 332     time_stretch(y, rate=rate, **kwargs),
    333     orig_sr=float(sr) / rate,
    334     target_sr=sr,
    335     res_type=res_type,
    336     scale=scale,
    337 )
    339 # Crop to the same dimension as the input
    340 return util.fix_length(y_shift, size=y.shape[-1])

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\effects.py:248, in time_stretch(y, rate, **kwargs)
    245 len_stretch = int(round(y.shape[-1] / rate))
    247 # Invert the STFT
--> 248 y_stretch = core.istft(stft_stretch, dtype=y.dtype, length=len_stretch, **kwargs)
    250 return y_stretch

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\core\spectrum.py:589, in istft(stft_matrix, hop_length, win_length, n_fft, window, center, dtype, length, out)
    586 bl_t = min(bl_s + n_columns, n_frames)
    588 # invert the block and apply the window function
--> 589 ytmp = ifft_window * fft.irfft(stft_matrix[..., bl_s:bl_t], n=n_fft, axis=-2)
    591 # Overlap-add the istft block starting at the i'th frame
    592 __overlap_add(y[..., frame * hop_length + offset :], ytmp, hop_length)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\fft\_pocketfft.py:513, in irfft(a, n, axis, norm)
    511     n = (a.shape[axis] - 1) * 2
    512 inv_norm = _get_backward_norm(n, norm)
--> 513 output = _raw_fft(a, n, axis, True, False, inv_norm)
    514 return output

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\fft\_pocketfft.py:73, in _raw_fft(a, n, axis, is_real, is_forward, inv_norm)
     71 else:
     72     a = swapaxes(a, axis, -1)
---> 73     r = pfi.execute(a, is_real, is_forward, fct)
     74     r = swapaxes(r, axis, -1)
     75 return r

KeyboardInterrupt: 
In [64]:
len(X), len(Y), data_path.Path.shape
Out[64]:
(1986, 1986, (12162,))
In [65]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('features.csv', index=False)
Features.head()
Out[65]:
0 1 2 3 4 5 6 7 8 9 ... 153 154 155 156 157 158 159 160 161 labels
0 0.321275 0.729664 0.750033 0.730624 0.735275 0.713529 0.660531 0.684966 0.733049 0.753972 ... 0.000004 0.000003 0.000002 0.000002 0.000005 0.000008 0.000007 0.000005 4.245834e-07 neutral
1 0.413755 0.884037 0.897497 0.899011 0.899051 0.881126 0.752190 0.722764 0.772003 0.818910 ... 0.001932 0.001748 0.001736 0.001750 0.001715 0.001871 0.001749 0.001632 1.710952e-03 neutral
2 0.490643 0.271094 0.255352 0.189282 0.290072 0.393606 0.335324 0.262065 0.382307 0.578054 ... 0.000055 0.000036 0.000007 0.000001 0.000009 0.000013 0.000011 0.000006 8.026328e-07 neutral
3 0.293566 0.673896 0.722096 0.723508 0.682302 0.680533 0.675352 0.628977 0.679179 0.707283 ... 0.000007 0.000007 0.000007 0.000007 0.000012 0.000010 0.000011 0.000006 4.254087e-07 neutral
4 0.414596 0.840906 0.881445 0.881813 0.871292 0.884976 0.804859 0.723588 0.751556 0.773561 ... 0.001784 0.001805 0.001766 0.001708 0.001776 0.001758 0.001801 0.001782 1.796141e-03 neutral

5 rows × 163 columns

In [66]:
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values
In [67]:
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
In [68]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[68]:
((1489, 162), (1489, 8), (497, 162), (497, 8))
In [69]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[69]:
((1489, 162), (1489, 8), (497, 162), (497, 8))
In [70]:
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[70]:
((1489, 162, 1), (1489, 8), (497, 162, 1), (497, 8))
In [71]:
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])

model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv1d_4 (Conv1D)                    │ (None, 162, 256)            │           1,536 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling1d_4 (MaxPooling1D)       │ (None, 81, 256)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv1d_5 (Conv1D)                    │ (None, 81, 256)             │         327,936 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling1d_5 (MaxPooling1D)       │ (None, 41, 256)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv1d_6 (Conv1D)                    │ (None, 41, 128)             │         163,968 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling1d_6 (MaxPooling1D)       │ (None, 21, 128)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_2 (Dropout)                  │ (None, 21, 128)             │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv1d_7 (Conv1D)                    │ (None, 21, 64)              │          41,024 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling1d_7 (MaxPooling1D)       │ (None, 11, 64)              │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_1 (Flatten)                  │ (None, 704)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_2 (Dense)                      │ (None, 32)                  │          22,560 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dropout_3 (Dropout)                  │ (None, 32)                  │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_3 (Dense)                      │ (None, 8)                   │             264 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
 Total params: 557,288 (2.13 MB)
 Trainable params: 557,288 (2.13 MB)
 Non-trainable params: 0 (0.00 B)
In [72]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
Epoch 1/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 5s 82ms/step - accuracy: 0.1356 - loss: 2.0409 - val_accuracy: 0.1771 - val_loss: 1.9933 - learning_rate: 0.0010
Epoch 2/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 62ms/step - accuracy: 0.1769 - loss: 1.9990 - val_accuracy: 0.2052 - val_loss: 1.9769 - learning_rate: 0.0010
Epoch 3/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 61ms/step - accuracy: 0.1803 - loss: 1.9844 - val_accuracy: 0.2093 - val_loss: 1.9597 - learning_rate: 0.0010
Epoch 4/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 61ms/step - accuracy: 0.2117 - loss: 1.9453 - val_accuracy: 0.2394 - val_loss: 1.9477 - learning_rate: 0.0010
Epoch 5/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2201 - loss: 1.9541 - val_accuracy: 0.2394 - val_loss: 1.9320 - learning_rate: 0.0010
Epoch 6/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2111 - loss: 1.9338 - val_accuracy: 0.2374 - val_loss: 1.8988 - learning_rate: 0.0010
Epoch 7/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2674 - loss: 1.8923 - val_accuracy: 0.2475 - val_loss: 1.8898 - learning_rate: 0.0010
Epoch 8/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2332 - loss: 1.8871 - val_accuracy: 0.2676 - val_loss: 1.8596 - learning_rate: 0.0010
Epoch 9/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2745 - loss: 1.8655 - val_accuracy: 0.2636 - val_loss: 1.8949 - learning_rate: 0.0010
Epoch 10/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.2772 - loss: 1.8602 - val_accuracy: 0.2716 - val_loss: 1.8254 - learning_rate: 0.0010
Epoch 11/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2686 - loss: 1.8505 - val_accuracy: 0.2777 - val_loss: 1.8288 - learning_rate: 0.0010
Epoch 12/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.2827 - loss: 1.8022 - val_accuracy: 0.2797 - val_loss: 1.8532 - learning_rate: 0.0010
Epoch 13/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.2688 - loss: 1.8065 - val_accuracy: 0.2897 - val_loss: 1.8025 - learning_rate: 0.0010
Epoch 14/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2999 - loss: 1.7864 - val_accuracy: 0.2918 - val_loss: 1.7962 - learning_rate: 0.0010
Epoch 15/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2919 - loss: 1.7617 - val_accuracy: 0.3159 - val_loss: 1.7643 - learning_rate: 0.0010
Epoch 16/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3024 - loss: 1.7525 - val_accuracy: 0.3219 - val_loss: 1.7750 - learning_rate: 0.0010
Epoch 17/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.2830 - loss: 1.7646 - val_accuracy: 0.3119 - val_loss: 1.7698 - learning_rate: 0.0010
Epoch 18/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.3070 - loss: 1.7253 - val_accuracy: 0.3139 - val_loss: 1.7468 - learning_rate: 4.0000e-04
Epoch 19/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.3407 - loss: 1.6729 - val_accuracy: 0.3360 - val_loss: 1.7182 - learning_rate: 4.0000e-04
Epoch 20/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3289 - loss: 1.7103 - val_accuracy: 0.3300 - val_loss: 1.7048 - learning_rate: 4.0000e-04
Epoch 21/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3381 - loss: 1.6660 - val_accuracy: 0.3280 - val_loss: 1.7087 - learning_rate: 4.0000e-04
Epoch 22/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.3491 - loss: 1.6526 - val_accuracy: 0.3421 - val_loss: 1.7095 - learning_rate: 4.0000e-04
Epoch 23/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.3470 - loss: 1.6416 - val_accuracy: 0.3421 - val_loss: 1.6968 - learning_rate: 4.0000e-04
Epoch 24/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.3654 - loss: 1.6342 - val_accuracy: 0.3461 - val_loss: 1.7091 - learning_rate: 4.0000e-04
Epoch 25/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.3865 - loss: 1.6072 - val_accuracy: 0.3783 - val_loss: 1.6763 - learning_rate: 4.0000e-04
Epoch 26/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4052 - loss: 1.5322 - val_accuracy: 0.3521 - val_loss: 1.6809 - learning_rate: 4.0000e-04
Epoch 27/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.3683 - loss: 1.5950 - val_accuracy: 0.3622 - val_loss: 1.6869 - learning_rate: 4.0000e-04
Epoch 28/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.3854 - loss: 1.5616 - val_accuracy: 0.3783 - val_loss: 1.6793 - learning_rate: 4.0000e-04
Epoch 29/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4058 - loss: 1.5451 - val_accuracy: 0.3984 - val_loss: 1.6689 - learning_rate: 4.0000e-04
Epoch 30/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4048 - loss: 1.5110 - val_accuracy: 0.3843 - val_loss: 1.6769 - learning_rate: 4.0000e-04
Epoch 31/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4175 - loss: 1.5559 - val_accuracy: 0.3843 - val_loss: 1.6726 - learning_rate: 4.0000e-04
Epoch 32/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4470 - loss: 1.4513 - val_accuracy: 0.3924 - val_loss: 1.6718 - learning_rate: 4.0000e-04
Epoch 33/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.4275 - loss: 1.4480 - val_accuracy: 0.3682 - val_loss: 1.6634 - learning_rate: 4.0000e-04
Epoch 34/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4312 - loss: 1.4856 - val_accuracy: 0.3682 - val_loss: 1.7018 - learning_rate: 4.0000e-04
Epoch 35/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.4417 - loss: 1.4801 - val_accuracy: 0.3984 - val_loss: 1.6345 - learning_rate: 4.0000e-04
Epoch 36/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.4592 - loss: 1.4084 - val_accuracy: 0.4064 - val_loss: 1.6561 - learning_rate: 4.0000e-04
Epoch 37/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4588 - loss: 1.4434 - val_accuracy: 0.4004 - val_loss: 1.6434 - learning_rate: 4.0000e-04
Epoch 38/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4531 - loss: 1.3943 - val_accuracy: 0.3964 - val_loss: 1.6604 - learning_rate: 4.0000e-04
Epoch 39/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4582 - loss: 1.3680 - val_accuracy: 0.4004 - val_loss: 1.6628 - learning_rate: 4.0000e-04
Epoch 40/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5020 - loss: 1.3477 - val_accuracy: 0.4366 - val_loss: 1.6496 - learning_rate: 4.0000e-04
Epoch 41/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4790 - loss: 1.3586 - val_accuracy: 0.4145 - val_loss: 1.6427 - learning_rate: 4.0000e-04
Epoch 42/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.4756 - loss: 1.3570 - val_accuracy: 0.4266 - val_loss: 1.6280 - learning_rate: 4.0000e-04
Epoch 43/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.5077 - loss: 1.2930 - val_accuracy: 0.4185 - val_loss: 1.6678 - learning_rate: 4.0000e-04
Epoch 44/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4973 - loss: 1.3144 - val_accuracy: 0.4366 - val_loss: 1.6394 - learning_rate: 4.0000e-04
Epoch 45/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5019 - loss: 1.2759 - val_accuracy: 0.4064 - val_loss: 1.6664 - learning_rate: 4.0000e-04
Epoch 46/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.5279 - loss: 1.2461 - val_accuracy: 0.4346 - val_loss: 1.6333 - learning_rate: 4.0000e-04
Epoch 47/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5367 - loss: 1.2632 - val_accuracy: 0.4225 - val_loss: 1.6363 - learning_rate: 4.0000e-04
Epoch 48/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5051 - loss: 1.2754 - val_accuracy: 0.4366 - val_loss: 1.6292 - learning_rate: 1.6000e-04
Epoch 49/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.5360 - loss: 1.2082 - val_accuracy: 0.4306 - val_loss: 1.6519 - learning_rate: 1.6000e-04
Epoch 50/50
24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.5767 - loss: 1.1809 - val_accuracy: 0.4286 - val_loss: 1.6361 - learning_rate: 1.6000e-04
In [73]:
print("Accuracy of our model on test data : " , model.evaluate(x_test,y_test)[1]*100 , "%")

epochs = [i for i in range(50)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
test_acc = history.history['val_accuracy']
test_loss = history.history['val_loss']

fig.set_size_inches(20,6)
ax[0].plot(epochs , train_loss , label = 'Training Loss')
ax[0].plot(epochs , test_loss , label = 'Testing Loss')
ax[0].set_title('Training & Testing Loss')
ax[0].legend()
ax[0].set_xlabel("Epochs")

ax[1].plot(epochs , train_acc , label = 'Training Accuracy')
ax[1].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[1].set_title('Training & Testing Accuracy')
ax[1].legend()
ax[1].set_xlabel("Epochs")
plt.show()
16/16 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - accuracy: 0.4288 - loss: 1.6298
Accuracy of our model on test data :  42.85714328289032 %
No description has been provided for this image
In [74]:
pred_test = model.predict(x_test)
y_pred = encoder.inverse_transform(pred_test)

y_test = encoder.inverse_transform(y_test)
16/16 ━━━━━━━━━━━━━━━━━━━━ 1s 29ms/step
In [77]:
df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df['Predicted Labels'] = y_pred.flatten()
df['Actual Labels'] = y_test.flatten()

df[5:15]
Out[77]:
Predicted Labels Actual Labels
5 fear fear
6 surprise surprise
7 disgust disgust
8 fear fear
9 calm calm
10 sad surprise
11 disgust sad
12 surprise fear
13 sad calm
14 calm disgust
In [78]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize = (12, 10))
cm = pd.DataFrame(cm , index = [i for i in encoder.categories_] , columns = [i for i in encoder.categories_])
sns.heatmap(cm, linecolor='white', cmap='Blues', linewidth=1, annot=True, fmt='')
plt.title('Confusion Matrix', size=20)
plt.xlabel('Predicted Labels', size=14)
plt.ylabel('Actual Labels', size=14)
plt.show()
No description has been provided for this image
In [79]:
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

       angry       0.60      0.48      0.54        66
        calm       0.54      0.68      0.60        66
     disgust       0.34      0.48      0.40        64
        fear       0.49      0.40      0.44        72
       happy       0.51      0.38      0.44        65
     neutral       0.14      0.03      0.05        33
         sad       0.23      0.15      0.18        67
    surprise       0.36      0.62      0.45        64

    accuracy                           0.43       497
   macro avg       0.40      0.41      0.39       497
weighted avg       0.42      0.43      0.41       497

In [ ]: